function cleandata = favarfillindata(data,Nfactors) ;
% function cleandata = favarfillindata(data,Nfactors)
%
% In general, many economic data series have missing data, particularly in early
%  years of the sample.  This function fills in those missing data using factor 
%  analysis.  The function extracts principal components from the data and
%  estimates factor loading of the data using only those years for which all
%  observations are available.  Then, the function works backward, extracting
%  factors for the previous period using as many observations as are available
%  and imputing values for the missing observations using the loadings estimated
%  from periods in which those missing series were observed.


% If no factors, then leave missing data as missing:
if (Nfactors==0); cleandata = data; return; end ;

% Otherwise, begin cleaning the data:
goodmonths = find(all(isfinite(data),2)) ;
badmonths = find(any(isnan(data),2)) ;

if (~isempty(badmonths)) ;
  % extract factors F from months without missing data:
  [factors,loadings,errorvar] = extract(data(goodmonths,:), Nfactors) ;

  % Then fill in the last missing observation. Fill in from last back to
  %  first because there are generally more data series available for later months,
  %  hence we can use more data at each step of the iteration if we go from last
  %  back to first rather than first forward to last.
  obs = ~isnan(data(badmonths(end),:)) ; % locations of the non-missing data
  observables = data(badmonths(end),obs) ;
  obsloadings = loadings(:,obs) ;
  errorvar = errorvar(obs,obs) ;
  condnum = cond(errorvar) ;
  if (condnum>10000) ; % if variance matrix of observables is poorly conditioned
    fprintf(' filling in observation % 3i by OLS (cond1=%6.0f)\n',badmonths(end),condnum) ;
    factorest = ols(observables',obsloadings',0)' ; % then get factor by OLS
  elseif (cond(obsloadings /errorvar *obsloadings')>10000) ;
    fprintf(' filling in observation % 3i by OLS (cond2=%6.0f)\n',badmonths(end),condnum) ;
    factorest = ols(observables',obsloadings',0)' ; % then get factor by OLS
  else ; % otherwise, get factor by GLS:
    fprintf(' filling in observation % 3i by GLS (cond1=%6.1f)\n',badmonths(end),condnum) ;
    factorest = (observables /errorvar *obsloadings') /(obsloadings /errorvar *obsloadings') ;
        % Note: this GLS routine should be improved numerically
  end ;

  % fill in first missing observation with model forecasts:
  data(badmonths(end),~obs) = factorest *loadings(:,~obs) ;

  % iterate until clean:
  cleandata = favarfillindata(data,Nfactors) ;

else ; % if the data is clean, return it:
  fprintf(' data is clean\n') ;
  cleandata = data ;
end ;

